Admin

Define functions, directories, color palettes, inputs, etc here.

Load packages

library(sf)
library(measurements)
library(tidycensus)
library(tidyverse)
library(tmap)
library(lubridate)
library(knitr)
library(kableExtra)

Standard projection

proj <- 2246 # https://www.spatialreference.org/ref/epsg/2246/

Themes and Color Palettes

paletteY <- c("#F9F871","#FFD364","#FFAF6D","#FF8F80","#F87895", "D16BA5")
palette5 <- c("#25CB10", "#5AB60C", "#8FA108","#C48C04", "#FA7800")
plotTheme <- theme(
  plot.title =element_text(size=15),
  plot.subtitle = element_text(size=8),
  plot.caption = element_text(size = 6),
  axis.text.x = element_text(size = 10, hjust = 1),
  axis.text.y = element_text(size = 10),
  axis.title.y = element_text(size = 10),
  # Set the entire chart region to blank
  panel.background=element_blank(),
  plot.background=element_blank(),
  #panel.border=element_rect(colour="#F0F0F0"),
  # Format the grid
  panel.grid.major=element_line(colour="#D0D0D0",size=.2),
  axis.ticks=element_blank())

mapTheme <- function(base_size = 12) {
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = 14,colour = "black"),
    plot.subtitle=element_text(face="italic"),
    plot.caption=element_text(hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(),axis.title = element_blank(),
    axis.text = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(colour = "black", fill=NA, size=2)
  )
}

Census Variables

census_df <- data.frame(vars =     c("B01003_001E", 
                                     "B19013_001E", 
                                     "B02001_002E", 
                                     "B08013_001E",
                                     "B08012_001E",
                                     "B08301_001E",
                                     "B08301_010E",
                                     "B01002_001E",
                                     "B08014_001E",
                                     "B08014_002E"),
                        
                        colNames = c("Total_Pop",
                                     "Med_Inc",
                                     "Med_Age",
                                     "White_Pop",
                                     "Vehicle_own_pop",
                                     "No_vehicle",
                                     "Total_Travel_Time",
                                     "Num_Commuters",
                                     "Means_of_Transport_pop",
                                     "Total_Public_Trans"),
                        stringsAsFactors = FALSE)

census_vars <- census_df$vars
census_colNames <- census_df$colNames

Helper Functions

rename_census_cols <- function(x){
  
  output <- x %>% 
    rename_at(vars(census_vars), 
              ~ census_colNames)

  output
}

Read/Prep Data

Louisville

Status Changes

rebalance_file <- paste(data_directory, 
                        "/Louisville-MDS-Status-Changes-2019Dec17.csv",
                        sep = "")

rebalance_data <- read_csv(rebalance_file) %>% 
  mutate(operators = ifelse(operators == "Bolt Lousiville", # correct typo in data
                            "Bolt Louisville",
                            operators),
         year = year(occurredAt),
         month = month(occurredAt),
         weekday = weekdays(occurredAt))

rebalance_data_sf <- st_as_sf(rebalance_data,
                              wkt = "location", 
                              crs = 4326) %>% 
  st_transform(proj)

Service Area

sa_file <- paste(data_directory, 
                        "/Dockless Vehicle Service Area/Dockless_Vehicle_Service_Area.shp",
                        sep = "")

sa <- read_sf(sa_file) %>% 
  st_transform(proj)

Census

#census data
LV_Census <- 
  get_acs(geography = "tract",
          variables = census_vars,
          year = 2018,
          state = "KY",
          geometry = TRUE,
          county = c("Jefferson"),
          output = "wide") %>%
  # rename(Total_Pop =  B01003_001E,
  #        Med_Inc = B19013_001E,
  #        Med_Age = B01002_001E,
  #        White_Pop = B02001_002E,
  #        Vehicle_own_pop = B08014_001E,
  #        No_vehicle = B08014_002E,
  #        Total_Travel_Time = B08013_001E,
  #        Num_Commuters = B08012_001E,
  #        Means_of_Transport_pop = B08301_001E,
  #        Total_Public_Trans = B08301_010E) %>%
  rename_census_cols() %>% 
  dplyr::select(
    # Total_Pop,
  #               Med_Inc, 
  #               White_Pop, 
  #               Total_Travel_Time,
  #               Means_of_Transport_pop, 
  #               Total_Public_Trans,
  #               Num_Commuters,
  #               Med_Age,
  #               Vehicle_own_pop,
  #               No_vehicle,
                census_colNames,
                GEOID, 
                geometry) %>%
  mutate(Percent_White = White_Pop / Total_Pop,
         Mean_Commute_Time = Total_Travel_Time / Num_Commuters,
         Percent_Taking_Public_Trans = Total_Public_Trans / Means_of_Transport_pop,
         Percent_vehicle_available = 1 - No_vehicle / Vehicle_own_pop) %>% 
  st_transform(proj)

Base Map

base_map <- st_read("https://opendata.arcgis.com/datasets/6e3dea8bd9cf49e6a764f7baa9141a95_30.geojson")

base_map_proj <- base_map %>% st_transform(proj)

Fishnet

1/10th of a square mile each

boundary <- st_union(base_map_proj) %>% st_sf()

cell_area <- conv_unit(0.5, from = "mi2", to = "ft2")
cell_size <- (cell_area * (2/3^0.5)) ^ 0.5 # the "cellsize" parameter is the distance between the centroids of each hexagonal cell.

lville_fishnet <- st_make_grid(boundary, cellsize = cell_size, square = FALSE) %>% 
  st_sf() %>% 
  mutate(fishnet_ID = row_number())

Austin

Census

ASTCensus <- 
  get_acs(geography = "tract", 
          variables = census_vars, 
          year = 2018, 
          state = "TX", 
          geometry = TRUE, 
          county = c("Travis"),
          output = "wide") %>%
  rename_census_cols() %>% 
  dplyr::select(census_colNames,
                GEOID, 
                geometry) %>%
  mutate(Percent_White = White_Pop / Total_Pop,
         Mean_Commute_Time = Total_Travel_Time / Num_Commuters,
         Percent_Taking_Public_Trans = Total_Public_Trans / Means_of_Transport_pop,
         Percent_vehicle_available = 1 - No_vehicle / Vehicle_own_pop) %>% 
  st_transform(proj)

ASTTracts <- 
  ASTCensus %>%
  as.data.frame() %>%
  distinct(GEOID, .keep_all = TRUE) %>%
  select(GEOID, geometry) %>% 
  st_sf() %>% 
  st_transform(proj)

Trips

Subset to June 2019

Austin_file <- paste(data_directory,
                     "/Shared_Micromobility_Vehicle_Trips_austin.csv",
                     sep = "")

dat_Aus <- read_csv(Austin_file)

dat_Aus_2019 <- dat_Aus %>%
  na.omit() %>%
  filter((Year == 2019) &
         (`Vehicle Type` == 'scooter'))
rm(dat_Aus)

dat_Aus_2019_june <- dat_Aus_2019 %>%
  filter(Month == 6)

Remove outliers and keep only trips in Travis County. Only 19 trips were outside of the county.

dat_Aus_2019_june$`Census Tract Start` <- as.character(dat_Aus_2019_june$`Census Tract Start`)
dat_Aus_2019_june$`Census Tract End` <- as.character(dat_Aus_2019_june$`Census Tract End`)
dat_Aus_2019_june_start <- merge(dat_Aus_2019_june, ASTTracts, all.x=F, by.x='Census Tract Start', by.y='GEOID')

Add Time Interval

dat_Aus_2019_june_start$`Start Time` <- as.POSIXct(dat_Aus_2019_june_start$`Start Time`, format='%m/%d/%Y %I:%M:%S %p')
#dat_Aus_2019_june_end$`End Time` <- as.POSIXct(dat_Aus_2019_june_end$`End Time`, format='%m/%d/%Y %I:%M:%S %p')
dat_Aus_2019_june_start <- dat_Aus_2019_june_start %>%
  mutate(interval60 = floor_date(ymd_hms(`Start Time`), unit = "hour"),
         #interval15 = floor_date(ymd_hms(starttime), unit = "15 mins"),
         week = week(interval60))
dat_Aus_2019_june_start$`Day of Week` <- weekdays(dat_Aus_2019_june_start$interval60)
dat_Aus_2019_june_start$`Day of Week` <- factor(dat_Aus_2019_june_start$`Day of Week`,
                                                level=c('Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'))

Scooter Turnover

sctTov_austin_dow <- dat_Aus_2019_june_start %>%
  group_by(week,`Day of Week`) %>%
  summarise(cnt=n(), turnover=cnt/length(unique(`Device ID`)))

sctTov_austin_dow <- sctTov_austin_dow %>%
  group_by(`Day of Week`)%>%
  summarise(turnover_m=mean(turnover))

sctTov_austin_hour <- dat_Aus_2019_june_start %>%
  group_by(Hour) %>%
  summarise(cnt=n(), turnover=cnt/length(unique(`Device ID`))/length(unique(date(interval60))))

Explore and Visualize Data

Louisville

Distribution of Scooter Status Change Activities

activity_distro_plot <- rebalance_data %>% 
  ggplot(aes(x = reason)) +
  geom_bar(stat = "count", position = "identity") +
  facet_wrap(~ type, scales = "free") +
  coord_flip() +
  labs(x = "Reason for Status Change",
       y = "Count",
       title = "Distribution of Scooter Status Change Activities")

activity_distro_plot

Frequency of Rebalancing

rebalance_only <- rebalance_data_sf %>% 
  filter(str_detect(reason, "rebalance"))
rebalance_only <- rebalance_only[base_map_proj,] #intersect data

users_only <- rebalance_data_sf %>% 
  filter(str_detect(reason, "user"))
users_only <- users_only[base_map_proj,] #intersect data
rps <- rebalance_only %>% 
  group_by(year, month) %>% 
  summarise(cnt = n(), 
            per = cnt / length(unique(vehicleId))) %>% 
  ungroup() %>% 
  mutate(year_mon = zoo::as.Date(zoo::as.yearmon(paste(year, month), "%Y %m"),
                            frac = 0))

Monthly Rebalances per Scooter

ggplot(data = rps %>% filter(year == 2019), aes(x = year_mon, 
                                                per, 
                                                group = 1))+
  geom_line(size = 1) +
  plotTheme

By day of week in June

#by day of week in June
rps_dow <- rebalance_only %>% 
  group_by(month, weekday) %>% 
  summarise(cnt = n(), 
            per = cnt / length(unique(vehicleId))) %>%
  ungroup() %>% 
  group_by(weekday) %>% 
  summarise(perd = mean(per)) %>% 
  mutate(weekday = factor(weekday,
                          level = c('Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday')))
ggplot(data = rps_dow, 
       aes(x = weekday, 
           y = perd, 
           group = 1))+
  geom_line(size = 1) +
  plotTheme

Rebalancing by Provider

contribution <- as.data.frame(prop.table(table(rebalance_only$operators)),
                              stringsAsFactors = FALSE) %>% 
  rename(Provider = Var1,
         Proportion = Freq) %>%
  mutate(Proportion = round(Proportion, 2)) %>% 
  rbind(c('Bird Louisville', 0),
        c('Spin Louisville',0))

contribution %>% 
  kable(caption = "Contribution by providers") %>%
  kable_styling("striped", full_width = F,position = "center") %>%
  row_spec(2, color = "white", background = "#33638dff") %>%
  row_spec(4, color = "white", background = "#33638dff")
Contribution by providers
Provider Proportion
Bolt Louisville 0.16
Lime Louisville 0.84
Bird Louisville 0
Spin Louisville 0

Geographic Distribution of Status Change Activities

Scooters tend to be rebalanced from all over Louisville to the waterfront and Old Louisville.

ggplot() +
  geom_sf(data = base_map_proj, fill = NA, color = "lightgray") +
  geom_sf(data = rebalance_only, 
          aes(color = reason),
          alpha = 0.1) +
  facet_wrap(~ reason) +
  theme_minimal()

rebalance_pickups <- rebalance_only %>% 
  dplyr::select(reason) %>% 
  filter(reason == "rebalance pick up")

rebalance_dropoffs <- rebalance_only %>% 
  dplyr::select(reason) %>% 
  filter(reason == "rebalance drop off")

Rebalance Pickups

tmap_mode("view")

tm_shape(rebalance_pickups %>% sample_n(10000)) +
  tm_dots(col = "red",
          alpha = 0.2)

Rebalance Dropoffs

tm_shape(rebalance_dropoffs %>% sample_n(10000)) +
  tm_dots(col = "blue",
          alpha = 0.2)

Trips in June 2019 Only

Limit to June 2019, remove trips outside service area

rebalance_june <- rebalance_data_sf %>% 
  filter(year == 2019,
         month == 6)

rebalance_june_sa <- st_intersection(rebalance_june, sa)
ggplot()+
  geom_sf(data = rebalance_june_sa)+
  geom_sf(data = sa, fill = 'transparent') +
  mapTheme()

Fishnet

lville_fishnet2 <- lville_fishnet %>% 
  mutate(pickups = lengths(st_intersects(., rebalance_pickups)),
         dropoffs = lengths(st_intersects(., rebalance_dropoffs))) %>% 
  gather(key = "Event", value = "Count", pickups:dropoffs)
ggplot() +
  # geom_sf(data = base_map_proj, fill = NA, color = "lightgray") +
  geom_sf(data = lville_fishnet2, 
          aes(fill = log(Count + 1)),
          alpha = 1) +
  scale_fill_continuous(high = "#132B43", low = "#56B1F7") +
  facet_wrap(~ Event) +
  theme_minimal() +
  labs(subtitle = "Note these are log-transformed") +
  mapTheme()

Census Variables

  • Percent White
  • Mean Commute Time
  • Percent Public Transit Riders
  • Percent with a Vehicle

histograms

LV_Census_2 <- LV_Census %>% 
  mutate(Percent_White_quintile = ntile(Percent_White, 5),
         Percent_Taking_Public_Trans_quintile = ntile(Percent_Taking_Public_Trans, 5),
         Percent_vehicle_quintile = ntile(Percent_vehicle_available, 5)) %>%
  dplyr::select(GEOID,
                Percent_White,
                Mean_Commute_Time,
                Percent_Taking_Public_Trans,
                Percent_vehicle_available,
                Percent_White_quintile,
                Percent_Taking_Public_Trans_quintile,
                Percent_vehicle_quintile
                ) %>%
  gather(key = "variable",
         value = "value",
         Percent_White:Percent_vehicle_quintile)

LV_Census_histogram <- LV_Census_2 %>% 
  filter(!str_detect(variable, "quintile")) %>% 
  ggplot(aes(x = value)) +
  geom_histogram(bins = 50) +
  facet_wrap(~ variable, 
             scales = "free")

LV_Census_histogram

maps by quintile

LV_Census_map <- ggplot() +
  geom_sf(data = LV_Census_2 %>% filter(str_detect(variable, "quintile")),
          aes(fill = value)) +
  scale_fill_continuous(high = "#132B43", low = "#56B1F7") +
  facet_wrap(~ variable, ncol = 1)

LV_Census_map

Austin

Time Pattern for Trips

By hour

ggplot(dat_Aus_2019_june_start %>%
         group_by(interval60) %>%
         tally())+
  geom_line(aes(x = interval60, y = n, group=1),size=.7)+
  labs(title="Scooter trips per hr. Austin, June, 2019",
       x="Date", 
       y="Number of trips") +
  plotTheme

By day of week

# by day of week
ggplot(data=dat_Aus_2019_june_start) +
  geom_freqpoly(aes(Hour, col=`Day of Week`), binwidth=1) +
  labs(title="Scooter trips in Austin by hour, by day of the week, June, 2019",
       x="Hour", 
       y="Trip Counts")+
  scale_color_viridis_d(direction = -1,
                      option = "D")+
  plotTheme

Turnover

Counts

ggplot(data=sctTov_austin_dow, aes(`Day of Week`, turnover_m, group=1))+
  geom_line(size=1) +
  labs(title="Turnover rate of scooter in Austin by day of the week, June, 2019",
       x="Hour", 
       y="Trip Counts")+
  plotTheme

Rate

ggplot(data=sctTov_austin_hour, aes(Hour, turnover, group=1))+
  geom_line(size=1) +
  labs(title="Turnover rate of scooter in Austin by day of the week, June, 2019",
       x="Hour", 
       y="Turnoverrate")+
  plotTheme